# Preface ======================================================================

## The following is a replication file for:
### Brew, Brian. "Teaching Electoral Institutions Using In-Class Simulations." 
### PS: Political Science and Politics. 2024.

## The paper associated with these datasets was not a heavily quantitative
## project. The quantitative analysis that featured into it was very straight-
## forward, and was done in Microsoft Excel. This R script replicates those
## results. The datasets themselves include the initial analysis -- as such,
## this code removes certain columns and rows.

## As mentioned in the README file, please make sure that you have downloaded
## the files in their original (CSV) format.

## As a final note -- the Excel code that was used in the initial analysis was
## removed from the CSV files posted to Dataverse in order to minimize the 
## amount of cleaning required in this R script. The coding process in Excel
## was straightforward -- the =AVERAGE(), =MEDIAN(), and = SUM() functions were
## used on appropriate columns to generate the values which are generated by 
## this code script.

# Loading Data and Required Packages ===========================================

## install dplyr and tidyr if not already installed:
## install.packages("dplyr")
## install.packages("tidyr")
library(dplyr)
library(tidyr)

## set working directory

#setwd("")

### Set working directory to the appropriate folder using the setwd() command
### and the appropriate filepath.

pre.post.dat <- read.csv("POLI 208 Spring 2024 Simulation Pre-Post Test Response Scores, Anonymized.csv")
survey.dat <- read.csv("POLI 208 Spring 2024 Simulation Survey_March 27, 2024_09.26.csv")
exam.2023.dat <- read.csv("POLI 208 Spring 2023 Exam Grade Summary, Anonymized.csv")
exam.2024.dat <- read.csv("POLI 208 Spring 2024 Exam Grade Summary, Anonymized.csv")

# Pre- and Post-Test ===========================================================

# The following code replicates the quantitative analysis performed in Excel
# for the pre- and post-test scores.

# remove students who missed either the pre- or post-test

pre.post.dat <- pre.post.dat %>%
  filter_all(all_vars(!(. %in% c("Absent", "Excused"))))

# separate out students who missed the day of the Iowa Caucus (see App. E)
pre.post.dat.missedIA <- pre.post.dat[pre.post.dat$missed_IA_day == 1, ]
pre.post.dat <- pre.post.dat[pre.post.dat$missed_IA_day == 0, ]

# Convert Integer and Character vars. to Numeric
pre.post.dat <- pre.post.dat %>%
  mutate_at(vars(contains("PRE")), as.numeric)
pre.post.dat <- pre.post.dat %>%
  mutate_at(vars(contains("POST")), as.numeric)
pre.post.dat.missedIA <- pre.post.dat.missedIA %>%
  mutate_at(vars(contains("PRE")), as.numeric)
pre.post.dat.missedIA <- pre.post.dat.missedIA %>%
  mutate_at(vars(contains("POST")), as.numeric)

## Table 1 Values ==============================================================

## This code generates the mean and median 
# Grab columns of interest
columns_of_interest <- pre.post.dat %>%
  select(starts_with("IA") & ends_with("_PRE"), 
         starts_with("IA") & ends_with("_POST"))

# Calculate mean and median for each column
summary_stats <- columns_of_interest %>%
  summarise_all(list(mean = ~mean(.x, na.rm = TRUE), 
                     median = ~median(.x, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(), 
               names_to = c("Column", "Pre or Post", "Statistic"), 
               names_sep = "_") %>%
  mutate(`Statistic` = ifelse(grepl("mean", Statistic), "Mean", "Median"))

# Print the resulting data
print(summary_stats, n=24)

## Appendix E values ===========================================================

## This code generates the mean and median
# Grab columns of interest
columns_of_interest.missedIA <- pre.post.dat.missedIA %>%
  select(starts_with("IA") & ends_with("_PRE"), 
         starts_with("IA") & ends_with("_POST"))

# Calculate mean and median for each column
summary_stats.missedIA <- columns_of_interest.missedIA %>%
  summarise_all(list(mean = ~mean(.x, na.rm = TRUE), 
                     median = ~median(.x, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(), 
               names_to = c("Column", "Pre or Post", "Statistic"), 
               names_sep = "_") %>%
  mutate(`Statistic` = ifelse(grepl("mean", Statistic), "Mean", "Median"))

# Print the resulting data
print(summary_stats.missedIA, n=24)

# Survey Analysis ==============================================================

## No coding was done in Excel for these.
## The dataset survey.dat may be used for reference, but the original CSV file
## is likely more useful as a reference.

# Exam Grade Analysis ==========================================================
## Spring 2023 =================================================================
E2023 <- exam.2023.dat
## This duplicates the original dataset, and allows for modifications that will
## yield the same results while retaining the original as a reference.
# Remove the last (notes) column
E2023_cleaned <- E2023[ , -ncol(E2023)]   
## Remove the bottom three rows -- these contain the results of Excel formulas
E2023_cleaned <- E2023_cleaned[1:(nrow(E2023_cleaned) - 3), ]  
#print(sapply(E2023_cleaned, class))

E2023_cleaned_summary <- E2023_cleaned %>%
  summarise(across(2:8, list(
    mean = ~ mean(. , na.rm = TRUE),
    median = ~ median(. , na.rm = TRUE),
    na_count = ~ sum(is.na(.))
  ), .names = "{col}_{fn}")) %>%
  pivot_longer(cols = everything(),
               names_to = c(".value", "variable"),
               names_sep = "_")
View(E2023_cleaned_summary)
# The values here are those that appear in Appendix G1

## Spring 2024 =================================================================
E2024 <- exam.2024.dat
## This duplicates the original dataset, and allows for modifications that will
## yield the same results while retaining the original as a reference.
# Remove the last column
E2024_cleaned <- E2024[ , -ncol(E2024)]   
# Remove the bottom three rows
E2024_cleaned <- E2024_cleaned[1:(nrow(E2024_cleaned) - 3), ]  
#print(sapply(E2024_cleaned, class))

E2024_cleaned_summary <- E2024_cleaned %>%
  summarise(across(2:8, list(
    mean = ~ mean(. , na.rm = TRUE),
    median = ~ median(. , na.rm = TRUE),
    na_count = ~ sum(is.na(.))
  ), .names = "{col}_{fn}")) %>%
  pivot_longer(cols = everything(),
               names_to = c(".value", "variable"),
               names_sep = "_")
View(E2024_cleaned_summary)
# The values here are those that appear in Appendix G2